package com.zhang.crawler.crawler.yicheng;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhang.crawler.crawler.utils.HttpRequestUtils;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author zhanghaidong
 * @version v1.0
 * @description 爬取专家数据
 * @date 2019/5/7
 */
public class SpecialistList {


    private static JSONArray jsonArray = new JSONArray();

    public static void main(String[] args) throws InterruptedException {


        for (int i = 0; i < 23; i++) {
            int finalI = i;
            Thread.sleep(10000);
            special(String.valueOf(finalI));
        }
        System.out.println("--------------get end-------------------------");
        System.out.println(jsonArray);

    }

    /**
     *
     * @param pageIndex 从0开始
     */
    public static void special(String pageIndex){

        JSONObject param = new JSONObject();
        param.put("status", "1");
        param.put("p", pageIndex);
        param.put("num", "3");
        param.put("sortthe", "%E9%BB%98%E8%AE%A4%E6%8E%92%E5%BA%8F");


        /**
         * 不要加 Content-Type cookie
         */
        JSONObject header = new JSONObject();
        header.put("Accept", "*/*");
        header.put("Accept-Encoding","gzip, deflate");
        header.put("Accept-Language","zh-CN,zh;q=0.9");
        header.put("Host","www.chinaliankeji.com");
        header.put("Origin","www.chinaliankeji.com");
        header.put("Referer","https://www.chinaliankeji.com/expert");
        header.put("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36");
        header.put("X-Requested-With","XMLHttpRequest");

        String url = "http://www.chinaliankeji.com/specialist/specialistList";
        try {
            String response = HttpRequestUtils.httpPost(url, header,  param);
            if (response == null || "-1".equals(response)) {
                return;
            }
            JSONObject jsonObject = JSONObject.parseObject(response);
            JSONArray list = jsonObject.getJSONArray("list");
            for (int i = 0; i < list.size() ; i++) {
                JSONObject object = list.getJSONObject(i);

                JSONObject json = new JSONObject();

                //学校
                String unit = object.getString("unit");
                json.put("unit", unit);
                //研究方向
                String researcharea = object.getString("researcharea");
                json.put("researchArea", researcharea);
                //详细介绍
                String personalinfo = object.getString("personalinfo");
                json.put("personalInfo", personalinfo);
                //头像
                String imagename = object.getString("imagename");
                json.put("imageName", imagename);
                //姓名
                String pname = object.getString("pname");
                json.put("name", pname);
                //所在地区
                String addrprovince = object.getString("addrprovince");
                json.put("addrProvince", addrprovince);
                //专家类型
                String transform = object.getString("transform");
                json.put("transform", transform);
                //专业职称
                String ptransform = object.getString("ptransform");
                json.put("pTransform", ptransform);
                //从事方向
                String wtransform = object.getString("wtransform");
                json.put("wTransform", wtransform);
                //专家id
                int id = object.getIntValue("id");
                //成就
                Thread.sleep(10000);
                String achieveMent = getAchieveMent(id);
                json.put("achieveMent", achieveMent);

                json.put("id", id);
                jsonArray.add(json);

            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static String getAchieveMent(int id){

        JSONObject header = new JSONObject();
        header.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3");
        header.put("Accept-Encoding","gzip, deflate");
        header.put("Accept-Language","zh-CN,zh;q=0.9");
        header.put("Cache-Control","max-age=0");
        header.put("Host","www.chinaliankeji.com");
        header.put("Referer","https://www.chinaliankeji.com/expert");
        header.put("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36");
        header.put("Upgrade-Insecure-Requests","1");

        try {
            String url = "http://www.chinaliankeji.com/specialist/1/" + id;
            String response = HttpRequestUtils.httpGet(url, null, null, false);
            if (response == null || "-1".equals(response)) {
                return "";
            }
            String regStr = "<div class=\"main-left-con-zycj\">([\\s\\S]*?)</div>";
            Pattern pattern = Pattern.compile(regStr);
            Matcher m = pattern.matcher(response);
            String result = "";
            if (m.find()) {
                System.out.println(m.group(1));
                String replace = m.group(1).replace("<span>", "").
                        replace("</span>","").
                        replace("主要成就","").
                        replace("<p>", "").replace("</p>", "");

                result = replace.trim();
                System.out.println(result);
            }
            return result;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return "";
    }
}
